list_wd <- strsplit(getwd(), '/')[[1]]
if (list_wd[length(list_wd)] == 'hadaca3_framework') {
score_files <- list.files(path = "./output/scores/", full.names = TRUE)
} else {
# score_files <- list.files(pattern = 'score-li*', full.names = TRUE)
# score_files <- system("find . -maxdepth 1 -type f -name 'score-li*'", intern = TRUE)
score_files <- dir_ls(".", regexp = "score-li.*")
}
plan(multisession,workers=4) # workers=25
# plan(sequential)
process_file <- function(score_file) {
base_name <- basename(score_file)
components <- str_match(base_name,
"score-li-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
# If file name doesn't match expected pattern, skip
if (any(is.na(components))) return(NULL)
scores <- tryCatch({
s <- read_hdf5(score_file)
gc()
s
}, error = function(e) {
message("Error reading file: ", score_file)
message(e)
NULL
})
# scores <- tryCatch({
# read_hdf5(score_file)
# }, error = function(e) return(NULL))
if (is.null(scores)) return(NULL)
cbind(
data.frame(
dataset = components[1],
ref = components[2],
preprocessing_mixRNA = components[3],
feature_selection_mixRNA = components[4],
preprocessing_RNA = components[5],
feature_selection_RNA = components[6],
preprocessing_scRNA = components[7],
feature_selection_scRNA = components[8],
deconvolution_rna = components[9],
preprocessing_mixMET = components[10],
feature_selection_mixMET = components[11],
preprocessing_MET = components[12],
feature_selection_MET = components[13],
deconvolution_met = components[14],
late_integration = components[15],
stringsAsFactors = FALSE
),
scores
)
}
# Process files in parallel
# results_list <- lapply(score_files, process_file)
results_list <- future_map(score_files, function(f) {
tryCatch(process_file(f), error = function(e) NULL)
})
all_functions_li <- c(
'preprocessing_mixRNA', 'feature_selection_mixRNA',
'preprocessing_RNA', 'feature_selection_RNA',
'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna',
'preprocessing_mixMET', 'feature_selection_mixMET',
'preprocessing_MET', 'feature_selection_MET', 'deconvolution_met',
'late_integration'
)
if(length(results_list) != 0) {
results_li <- do.call(rbind, results_list)
results_li %>%
# filter(dc==2) %>%
group_by(late_integration) %>%
summarise(GlobalScore = median(score_aggreg)) %>%
arrange(desc(GlobalScore))
results_li_arrange = results_li %>%
group_by(preprocessing_mixRNA, feature_selection_mixRNA,
preprocessing_RNA, feature_selection_RNA,
preprocessing_scRNA, feature_selection_scRNA, deconvolution_rna,
preprocessing_mixMET,feature_selection_mixMET,
preprocessing_MET, feature_selection_MET, deconvolution_met,
late_integration, .groups = "keep") %>%
summarise(GlobalScore = median(score_aggreg)) %>%
arrange(desc(GlobalScore))
# Optional: reorder factors
all_data_used <- c('dataset', 'ref')
for (data_used in all_data_used) {
results_li[[data_used]] <- factor(results_li[[data_used]], levels = unique(results_li[[data_used]]))
}
# Optional: order other factors based on performance on 'invitro1'
if ("invitro1" %in% results_li$dataset) {
for (fun in all_functions_li) {
results_li[[fun]] <- factor(results_li[[fun]],
levels = unique(results_li[[fun]][order(results_li$score_aggreg[results_li$dataset == 'invitro1'], decreasing = TRUE)]))
}
}
}else{
results_li = all_functions_li
}
#> `summarise()` has grouped output by 'preprocessing_mixRNA',
#> 'feature_selection_mixRNA', 'preprocessing_RNA', 'feature_selection_RNA',
#> 'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna',
#> 'preprocessing_mixMET', 'feature_selection_mixMET', 'preprocessing_MET',
#> 'feature_selection_MET', 'deconvolution_met', 'late_integration'. You can
#> override using the `.groups` argument.
# dataset_names <- unique(results_li$dataset)
if (is.data.frame(results_li)) {
dynamic_name_li <- paste0("results_li_",paste(unique(results_li$dataset), collapse = "_") )
}else{
dynamic_name_li <- paste0("results_li_empty")
}
# Assign dynamically to global environment
# assign("results_li",dynamic_name_li, envir = .GlobalEnv)
# Write compressed output
write.csv(results_li, file = gzfile(paste0(dynamic_name_li, ".csv.gz")), row.names = FALSE)
# results_li = read.csv(file = gzfile("results_li.csv.gz"))
#> Warning in instance$preRenderHook(instance): It seems your data is too big for
#> client-side DataTables. You may consider server-side processing:
#> https://rstudio.github.io/DT/server.html
rmarkdown::render(input ='08_metaanalysis.Rmd',envir = parent.frame());
#>
#>
#> processing file: 08_metaanalysis.Rmd
#> 1/72
#> 2/72 [unnamed-chunk-9]
#> 3/72
#> 4/72 [re_loading_pckgs]
#> 5/72
#> 6/72 [load table if they do not exist]
#> 7/72
#> 8/72 [results_li_top10]
#> 9/72
#> 10/72 [fun_arranged_boxplot]
#> 11/72
#> 12/72 [cor_dens_plot]
#> 13/72
#> 14/72 [cor_plot]
#> 15/72
#> 16/72 [stepwise_model]
#> 17/72
#> 18/72 [val_propre_pca]
#> 19/72
#> 20/72 [var_plot_pca]
#> 21/72
#> 22/72 [dim_desc_pca]
#> 23/72
#> 24/72 [new_plot_ellipses]
#> 25/72
#> 26/72 [mean_standardise_results_li_by_dataset]
#> 27/72
#> 28/72 [cor_dens_plot_standardise]
#> 29/72
#> 30/72 [cor_plot_standardise]
#> 31/72
#> 32/72 [stepwise_model_standardise ]
#> 33/72
#> 34/72 [val_propre_pca_standardise]
#> 35/72
#> 36/72 [var_plot_pca_standardise]
#> 37/72
#> 38/72 [dim_desc_pca_standardise]
#> 39/72
#> 40/72 [new_plot_ellipses_standardise]
#> 41/72
#> 42/72 [unnamed-chunk-10]
#> 43/72
#> 44/72 [MFA]
#> 45/72
#> 46/72 [RV_coef]
#> 47/72
#> 48/72 [plot_group]
#> 49/72
#> 50/72 [plot_var]
#> 51/72
#> 52/72 [unnamed-chunk-11]
#> 53/72
#> 54/72 [unnamed-chunk-12]
#> 55/72
#> 56/72 [prepare data]
#> 57/72
#> 58/72 [lm model]
#> 59/72
#> 60/72 [anova]
#> 61/72
#> 62/72 [unnamed-chunk-13]
#> 63/72
#> 64/72 [Convert function-type columns to dummy variables]
#> 65/72
#> 66/72 [Run PCA]
#> 67/72
#> 68/72 [Visualize PCA with Score Overlay ]
#> 69/72
#> 70/72 [contributing components]
#> 71/72
#> 72/72 [pca ]
#> output file: 08_metaanalysis.knit.md
#> /home/github-runner/.conda/envs/hadaca3framework_env/bin/pandoc +RTS -K512m -RTS 08_metaanalysis.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output 08_metaanalysis.html --lua-filter /home/github-runner/.conda/envs/hadaca3framework_env/lib/R/library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /home/github-runner/.conda/envs/hadaca3framework_env/lib/R/library/rmarkdown/rmarkdown/lua/latex-div.lua --lua-filter /home/github-runner/.conda/envs/hadaca3framework_env/lib/R/library/rmarkdown/rmarkdown/lua/table-classes.lua --variable bs3=TRUE --standalone --section-divs --table-of-contents --toc-depth 3 --variable toc_float=1 --variable toc_selectors=h1,h2,h3 --variable toc_collapsed=1 --variable toc_smooth_scroll=1 --variable toc_print=1 --template /home/github-runner/.conda/envs/hadaca3framework_env/lib/R/library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --number-sections --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmpJ9kZYd/rmarkdown-str3183ef34b66a69.html
#>
#> Output created: 08_metaanalysis.html







#> Warning: Removed 2592 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 2592 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 2592 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 2592 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 2592 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).